Merge pull request #28 from UbiquitousLearning/develop-lx

Develop lx
UbiquitousLearning · Nov 17, 2023 · c22ddb6 · c22ddb6
2 parents 40dbbcc + b6933ae
commit c22ddb6
Show file tree

Hide file tree

Showing 7 changed files with 669 additions and 3 deletions.
diff --git a/.clang-tidy b/.clang-tidy
@@ -21,7 +21,7 @@ Checks: >
 
 # Turn all the warnings from the checks above into errors.
 WarningsAsErrors: "*"
-
+HeaderFilterRegex: ".*\.hpp$&!gguf\.hpp"
 CheckOptions:
   - key: readability-identifier-naming.NamespaceCase
     value: lower_case

diff --git a/.clang-tidy.ignore b/.clang-tidy.ignore
@@ -0,0 +1 @@
+src/quantizer/gguf.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -102,6 +102,16 @@ if(QUANT)
         # ${DIR_SRC}
         ${PROJECT_SOURCE_DIR}/src/ParamLoader.cpp
     )
+    add_executable(
+        from_gguf
+        ${PROJECT_SOURCE_DIR}/src/quantizer/gguf.cpp
+        ${MLLM_QUANT}
+        ${MLLM_QUANTIZER}
+
+        # ${DIR_SRC}
+        ${PROJECT_SOURCE_DIR}/src/ParamLoader.cpp
+    )
+
 endif()
 
 if(TEST)

diff --git a/include/Types.hpp b/include/Types.hpp
@@ -43,7 +43,7 @@ enum DataType {
     // MLLM_TYPE_Q3_K = 11,
     MLLM_TYPE_Q4_K = 12,
     // MLLM_TYPE_Q5_K = 13,
-    // MLLM_TYPE_Q6_K = 14,
+    MLLM_TYPE_Q6_K = 14,
     MLLM_TYPE_Q8_K = 15,
     MLLM_TYPE_I8,
     MLLM_TYPE_I16,
@@ -100,6 +100,15 @@ typedef struct {
 static_assert(sizeof(block_q4_K) == 2 * sizeof(mllm_fp16_t) + K_SCALE_SIZE + QK_K / 2, "wrong q4_K block size/padding");
 #endif
 
+#pragma pack(1)
+typedef struct {
+    uint8_t ql[QK_K / 2];     // quants, lower 4 bits
+    uint8_t qh[QK_K / 4];     // quants, upper 2 bits
+    int8_t scales[QK_K / 16]; // scales, quantized with 8 bits
+    mllm_fp16_t d;            // super-block scale
+} block_q6_K;
+#pragma pack()
+
 #define QK8_0 32
 #pragma pack(1)
 typedef struct {

diff --git a/src/backends/cpu/CPUSoftMax.cpp b/src/backends/cpu/CPUSoftMax.cpp
@@ -83,7 +83,7 @@ ErrorCode CPUSoftMax::execute(vector<shared_ptr<Tensor>> inputs, vector<shared_p
                     uint16_t scvt;
                     for (int i = 0; i < num_classes; i++) {
                         if (input->dataAt<float>({n, h, s, i}) == -INFINITY) {
-                            dp[i] = 0.0f;
+                            dp[i] = 0.0F;
                         } else {
                             mllm_fp16_t tmp = MLLM_FP32_TO_FP16(input->dataAt<float>({n, h, s, i}) - max);
                             memcpy(&scvt, &tmp, sizeof(scvt));

diff --git a/src/quantizer/gguf.cpp b/src/quantizer/gguf.cpp
@@ -0,0 +1,15 @@
+#include "gguf.hpp"
+#include "ParamWriter.hpp"
+#include <string>
+// gguf input_file outfile
+int main(int argc, char **argv) {
+    if (argc != 3) {
+        std::cerr << "Usage: " << argv[0] << " input_file outfile" << std::endl;
+        return 1;
+    }
+    std::string input_file(argv[1]);
+    std::string output_file(argv[2]);
+    auto *writer = new ParamWriter(output_file);
+    from_gguf(input_file, writer);
+    return 0;
+}