ggml : faster ggml_conv_2d using 2-stage op (ggerganov#483)

* ggml : fix ggm_conv_2d impl * ggml : make ggml_conv_2d a little faster * ggml : reorganize ggml_conv_2d code * ggml : make ggml_conv_2d faster * use int64_t in conv_2d stage 0 * ggml : add TODO about im2col --------- Co-authored-by: Georgi Gerganov <[email protected]>
PABannier · Oct 9, 2023 · 6549d12 · 6549d12
1 parent faebeb9
commit 6549d12
Show file tree

Hide file tree

Showing 2 changed files with 356 additions and 89 deletions.
diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -400,15 +400,16 @@ extern "C" {
         GGML_OP_ALIBI,
         GGML_OP_CLAMP,
         GGML_OP_CONV_1D,
-        GGML_OP_CONV_2D,
+        GGML_OP_CONV_1D_STAGE_0,  // internal
+        GGML_OP_CONV_1D_STAGE_1,  // internal
         GGML_OP_CONV_TRANSPOSE_1D,
+        GGML_OP_CONV_2D,
+        GGML_OP_CONV_2D_STAGE_0, // internal
+        GGML_OP_CONV_2D_STAGE_1, // internal
         GGML_OP_CONV_TRANSPOSE_2D,
         GGML_OP_POOL_1D,
         GGML_OP_POOL_2D,
 
-        GGML_OP_CONV_1D_STAGE_0,  // internal
-        GGML_OP_CONV_1D_STAGE_1,  // internal
-
         GGML_OP_UPSCALE, // nearest interpolate
 
         GGML_OP_FLASH_ATTN,
@@ -1016,9 +1017,9 @@ extern "C" {
             struct ggml_tensor  * b,
             float                 eps);
 
-    // A: n columns, m rows
-    // B: n columns, p rows  (i.e. we transpose it internally)
-    // result is m columns, p rows
+    // A: k columns, n rows => [ne03, ne02, n, k]
+    // B: k columns, m rows  (i.e. we transpose it internally) => [ne03 * x, ne02 * y, m, k]
+    // result is n columns, m rows => [ne03 * x, ne02 * y, m, n]
     GGML_API struct ggml_tensor * ggml_mul_mat(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,